*************************************************************************************************
/*	Purpose: 									     
	-clean AVTMH, 1976								     
	-Attach 1950 ANES occ codes to AVTMH occupations
   			
   	Creates: AVTMH1976_cleaned.dta                    */
*************************************************************************************************

set more off 
clear 

cd "$Mydirectory1/1_DataSources/Americans_MentalHealth_1976/"

use "./RawData/07948-0001-Data.dta"  //download from ICPSR. Search ICPSR 7948. Name is the same.
do ./code/07948-0001-Supplemental_syntax.do /*Run ICPSR-provided clean-up*/ //download from ICPSR

rename V* v*

*** Weight 
//No weight provided with the AVTMH 1976.
gen weight_AVTMH76 =1

/*------------------------------------------------------------------------------------------

											CLEANING

------------------------------------------------------------------------------------------*/

***********************************************
* Demographics *
***********************************************

*****************
* Gender *
*****************

rename v772 sex
tab sex, m

*****************
* Age *
*****************

rename v841 age
gen agesq = age * age

keep if inrange(age,30,50)

******************
* Marital Status *
******************

gen married = (v131==1) if v131<.
tab married, m

gen separated = (v131==2) if v131<.
tab separated, m 

gen widowed = (v131==4) if v131<.
tab widowed, m

gen divorced = (v131==3) if v131<.
tab divorced, m

gen never_married = (v131==5) if v131<.  
tab never_married, m


******************
* Race *
******************

/*NOTE:
       The categories of Mexican American, Puerto Rican, and Cuban 
       will be coded as "white" to match the GSS, which puts Latinx 
       individuals in this category. 
*/
gen race =.
replace race = 1 if (v773==1 | v773==4) 
replace race = 2 if v773==2 
* Note: "." includes Asian American, Indian American, Native American, other and "."

tab race, m
label variable race "Respondent's Race (re-coded as white, black, missing)"

gen black = (race==2) if race<.
tab black, m
label variable black "Dummy =1 if Respondent is Black"

*Note: Father race is not available

******************
* Foreign Born *
******************

*respondent
gen foreignborn = (v729>200) if v729<.
tab foreignborn, m

keep if (foreignborn==0 | foreignborn==.)

*father--not available

*---------------------------------------------------------------------------------------------------*
*---------------------------------------------------------------------------------------------------*

*************************************************************************************
* State/Region Where Respondent Grew Up or Was Born *
*************************************************************************************

***********************
* State R was born in * 
***********************

gen sticpsr = v729
replace sticpsr =. if sticpsr>186
replace sticpsr = sticpsr - 100 /*need to make sticpsr in same range as sticpsr variable in crosswalk*/

*fix a couple state codes so that they match state icpsr codes in xwalk
replace sticpsr = 41 if v729==140 /*Alabama*/
replace sticpsr = 42 if v729==141 /*Arkansas*/
replace sticpsr = 43 if v729==142 /*Florida*/
replace sticpsr = 44 if v729==143 /*Georgia*/
replace sticpsr = 45 if v729==144 /*Louisiana*/
replace sticpsr = 46 if v729==145 /*Mississippi*/
replace sticpsr = 47 if v729==146 /*North Carolina*/
replace sticpsr = 48 if v729==147 /*South Carolina*/
replace sticpsr = 49 if v729==148 /*Texas*/
replace sticpsr = 40 if v729==149 /*Virginia*/
replace sticpsr = 82 if v729==181 /*Hawaii*/
replace sticpsr = 88 if v729==182 /*Giving Puerto Rico a temporary sticpsr so that its fips can be changed later. 88 is not given to any other sticpsr*/
tab sticpsr,m

sort sticpsr
tempfile xwalk
save `xwalk'

use "../Crosswalks/statecodes_2FIPScrosswalk.dta", clear
drop v276 
rename name statename_birth
sort sticpsr
merge 1:m sticpsr using `xwalk'

replace fips = 72 if sticpsr==88 & _merge==2
replace statename_birth = "Puerto Rico" if fips==72
replace sticpsr=. if fips==72

drop if _merge==1
drop _merge

gen bpl = fips
tab bpl, m
label var bpl "R Birthplace, FIPS codes"

***********************
* Region R was born in * 
***********************

gen region4_born=.
	* Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
replace region4_born=1 if fips==9 | fips==23 | fips==25 | fips==33 | fips==44 | fips==50 | fips==34 | fips==36 | fips==42
	* Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
replace region4_born=2 if fips==17 | fips==18 | fips==26 | fips==39 | fips==55 | fips==19 | fips==20 | fips==27 | fips==29 | fips==31 | fips==38 | fips==46
	/* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
			  Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
replace region4_born=3 if fips==10 | fips==11 | fips==12 | fips==13 | fips==24 | fips==37 | fips==45 | fips==51 | fips==54 | fips==1 | fips==21 | fips==28 | fips==47 | fips==5 | fips==22 | fips==40 | fips==48
	* West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington 
replace region4_born=4 if fips==4 | fips==8 | fips==16 | fips==30 | fips==32 | fips==35 | fips==49 | fips==56 | fips==6 | fips==41 | fips==53 | fips==2 | fips==15
tab region4_born, m
label var region4_born "Region R born"

drop fips sticpsr 

******************************************
* Whether R was born in the south * 
******************************************

gen bornsouth = (region4_born==3) if region4_born~=.
tab bornsouth, m

******************************************
* State and Region R grew up in * 
******************************************

gen sticpsr = v730 
replace sticpsr =. if sticpsr>186
replace sticpsr = sticpsr - 100 

*fix a couple state codes so that they match icpsr codes in xwalk
replace sticpsr = 41 if v730==140 /*Alabama*/
replace sticpsr = 42 if v730==141 /*Arkansas*/
replace sticpsr = 43 if v730==142 /*Florida*/
replace sticpsr = 44 if v730==143 /*Georgia*/
replace sticpsr = 45 if v730==144 /*Louisiana*/
replace sticpsr = 46 if v730==145 /*Mississippi*/
replace sticpsr = 47 if v730==146 /*North Carolina*/
replace sticpsr = 48 if v730==147 /*South Carolina*/
replace sticpsr = 49 if v730==148 /*Texas*/
replace sticpsr = 40 if v730==149 /*Virginia*/
replace sticpsr = 88 if v730==182 /*Giving Puerto Rico a temporary sticpsr so that the fips can be changed later. 88 is not given to any other sticpsr*/
tab sticpsr,m

sort sticpsr
tempfile xwalk2
save `xwalk2'

use "../Crosswalks/statecodes_2FIPScrosswalk.dta", clear

drop v276
rename name statename_childhood
sort sticpsr
merge 1:m sticpsr using `xwalk2'

replace fips = 72 if sticpsr==88 & _merge==2
replace statename_childhood = "Puerto Rico" if fips==72
replace sticpsr=. if fips==72

drop if _merge==1
drop _merge

gen state_childhood = fips 
tab state_childhood, m
label var state_childhood "R state grew up in, FIPS codes"

gen region4_childhood=.
	* Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
replace region4_childhood=1 if fips==9 | fips==23 | fips==25 | fips==33 | fips==44 | fips==50 | fips==34 | fips==36 | fips==42
	* Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
replace region4_childhood=2 if fips==17 | fips==18 | fips==26 | fips==39 | fips==55 | fips==19 | fips==20 | fips==27 | fips==29 | fips==31 | fips==38 | fips==46
	/* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
			  Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
replace region4_childhood=3 if fips==10 | fips==11 | fips==12 | fips==13 | fips==24 | fips==37 | fips==45 | fips==51 | fips==54 | fips==1 | fips==21 | fips==28 | fips==47 | fips==5 | fips==22 | fips==40 | fips==48
	* West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington 
replace region4_childhood=4 if fips==4 | fips==8 | fips==16 | fips==30 | fips==32 | fips==35 | fips==49 | fips==56 | fips==6 | fips==41 | fips==53 | fips==2 | fips==15

label var region4_childhood "Region R grew up"

drop fips sticpsr

******************************************
* Region R currently resides in * 
******************************************

gen region4=.
	* Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
replace region4 =1 if (v819 ==1 | v819 ==2)
	* Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
replace region4 =2 if (v819 ==3 | v819 ==4)
	* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama, Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas
replace region4 =3 if (v819 ==5 | v819 ==6)
	* West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington 
replace region4 =4 if (v819 ==7 | v819 ==8)

label var region4 "Region R resides in"

label define region_l 1 "NORTHEAST" 2 "MIDWEST" 3 "SOUTH" 4 "WEST"
label values region4 region4_born region4_childhood region_l
tab region4_born, m
tab region4_childhood, m
tab region4, m

********************************************************
* Whether R has moved region since birth or childhood * 
********************************************************

gen moved_region = (region4~=region4_born | region4~=region4_childhood)
replace moved_region =. if (region4_born==. & region4_childhood==.) 
replace moved_region =0 if (region4==region4_born & region4_childhood==. & region4~=.)
replace moved_region =0 if (region4==region4_childhood & region4_born==. & region4~=.)
tab moved_region, m

*---------------------------------------------------------------------------------------------------*
*---------------------------------------------------------------------------------------------------*

*****************
* Employment *
*****************

gen employed = inlist(v310,11,12,51,52,61,62,71,72,81,82) 
tab employed, m

gen selfemployed = (v313==1) if v313<.
tab selfemployed, m

*******************************
* Education-Respondent *
*******************************

rename v749 higrade
tab higrade, m

gen eduR= 0 if higrade==0 
replace eduR=1 if higrade>=1 & higrade<8  /* some grade school */ 
replace eduR=2 if higrade==8  /* completed 8th grade */ 
replace eduR=3 if higrade>8 & higrade<12  /* some HS */ 
replace eduR=4 if higrade==12 /* 4 years of HS */ 
replace eduR=5 if higrade>12 & higrade<16  /* 1-3 years of college */ 
replace eduR=6 if higrade>15 & higrade<.  /* 4 or more years of college */ 
tab eduR, m
			
rename higrade yrsschool 
label var yrsschool "Years of school" 
tab yrsschool, m nol 

gen yrsschool_bin=. 
replace yrsschool_bin = 0 if yrsschool==0 
replace yrsschool_bin = 6 if yrsschool>0 & yrsschool<8 
replace yrsschool_bin = 8 if yrsschool==8 
replace yrsschool_bin = 10 if yrsschool>8 & yrsschool<12 
replace yrsschool_bin = 12 if yrsschool==12 
replace yrsschool_bin = 14 if yrsschool>12 & yrsschool<16 
replace yrsschool_bin = 16 if yrsschool>=16 & yrsschool<20 
tab yrsschool_bin, m
label var yrsschool_bin "Years of school, binned" 
	
gen hs_ed = eduR>=4 if edu<. 
tab hs_ed, m
label var hs_ed "HS educated" 

gen coll_ed = eduR>=6 if edu<.
tab coll_ed, m 
label var coll_ed "Coll educated" 

*********************
* Education-Parents *
*********************

rename v743 dad_ed_raw
rename v747 mom_ed_raw

foreach name in mom dad { 
gen edu_`name'=0 if `name'_ed_raw==0 //none
replace edu_`name'=1 if `name'_ed_raw>0 & `name'_ed_raw<8 //some grade school
replace edu_`name'=2 if `name'_ed_raw==8 //completed 8th grade
replace edu_`name'=3 if `name'_ed_raw>8 & `name'_ed_raw<12 //some HS 
replace edu_`name'=4 if `name'_ed_raw==12 //4 years of HS
replace edu_`name'=5 if `name'_ed_raw>12 & `name'_ed_raw<16 //some college
replace edu_`name'=6 if `name'_ed_raw==16 | `name'_ed_raw==17  //college
label var edu_`name' "Educational categories for `name'" 
tab edu_`name', m
} 

* binned
foreach name in mom dad { 
	gen edu_`name'_bin=0 if edu_`name'==0 
	replace edu_`name'_bin=6 if edu_`name'==1 
	replace edu_`name'_bin=8 if edu_`name'==2 
	replace edu_`name'_bin=10 if edu_`name'==3 
	replace edu_`name'_bin=12 if edu_`name'==4 
	replace edu_`name'_bin=14 if edu_`name'==5 
	replace edu_`name'_bin=16 if edu_`name'==6 
	tab edu_`name'_bin, m
	label var edu_`name'_bin "`name' Years of school from bins"
		
	gen `name'_hs_ed = edu_`name'>=4 if edu_`name'<.
	tab `name'_hs_ed, m

	gen `name'_coll_ed = edu_`name'>=6 if edu_`name'<.
	tab `name'_coll_ed, m 

	label var `name'_hs_ed "`name' HS educated" 
	label var `name'_coll_ed "`name' College educated"
}

foreach a in dad mom {
	rename `a'_ed_raw yrsschool_`a'
}

*************************
* R's hh size *
*************************

do ./code/1a_AVTMH1976_R_hhsize.do

******************
* Unions *
******************

* not available

*******************
* Veterans *
*******************

* not available

/*------------------------------------------------------------------------------------------

Crosswalking 1976 AVTMH  occupations to 1950 ANES occupations for fathers/mothers

------------------------------------------------------------------------------------------*/

***********************
*		      *
*    Crosswalking     *
* 		      *
***********************

tab v740, m /*father occ*/
gen census1970=v740
label var census1970 "==dad_occ (renamed to facilitate a merge)" 

sort census1970

merge m:1 census1970 using "../Crosswalks/Crosswalk_1970Census_toANES.dta"
assert census1970==. if _merge==1
drop if _merge==2
drop _merge

*************************************************************************
* Fix occupations for self-employed businessmen, managers, or officials * 
*************************************************************************
/*Note: If R was both self-employed AND worked a job for someone else, 
        fatheroccej is not replaced. */
replace fatheroccej=21 if fatheroccej==28 & v742==1

*********************************************************
*********************************************************
* Correct dad occ + make mother occ & binary indicators *
*********************************************************
*********************************************************

tab v745 if fatheroccej!=., m

clonevar fatheroccej_nochange = fatheroccej
label var fatheroccej_nochange "father or head of hh occupation--unchanged since merge with ANES occs"

//fix fatheroccej so that it's ONLY the occs of male heads. (i.e., not necessarily his dad)
replace fatheroccej =. if (v745==3 | v745==4)
assert fatheroccej==. if fatheroccej_nochange==. | (v745==3 | v745==4)  //asserting that fatheroccej is only missing when we 1) have no info about head of hh's occ and/or 2) the head was a female. Assertion is true.

// Make motheroccej--will be non-missing if R's head of hh when growing up was a FEMALE (i.e., not necessarily his mom)
gen motheroccej =.
replace motheroccej = fatheroccej_nochange if inlist(v745,3,4)
tab motheroccej,m 
assert motheroccej==. if fatheroccej_nochange==. | inlist(v745,0,1,2,9,.) //asserting that motheroccej is only missing when we 1) have no info about head of hh's occ and/or 2) the head was a male or we don't know gender of head. Assertion is true.

	tab v745, gen(headofhh_)
	ren headofhh_1 headofhh_father
	ren headofhh_2 headofhh_othermale
	ren headofhh_3 headofhh_otherfemale
	ren headofhh_4 headofhh_mother
	
	foreach name in father othermale otherfemale mother {
		assert headofhh_`name' ==. if v745==.
	}
	
	label var headofhh_father "Head of hh when R was growing up was R's father"
	label var headofhh_othermale "Head of hh when R was growing up was some other male (not R's father)"
	label var headofhh_otherfemale "Head of hh when R was growing up was some other female (not R's mother)"
	label var headofhh_mother "Head of hh when R was growing up was R's mother"

	//create alternate dummy for hh head being R's father. When R reports occupation of a parent but it is not specified who the hh head occupation question was asked about, will assume that R lived with father.	
	gen headofhh_father_imputed = headofhh_father
	replace headofhh_father_imputed =1 if fatheroccej_nochange!=. & v745==.
	tab headofhh_father_imputed,m 
	tab headofhh_father_imputed if v740==997, m 
	label var headofhh_father_imputed "Impute dad when parent occ != missing & no info about hh head at age 16"


*****************************************************************************************************
* DUMMIES FOR WHEN WE KNOW WHY DAD OR MOM DIDN'T WORK (I.E. WHY FATHEROCCEJ/MOTHEROCCEJ IS MISSING) *
*****************************************************************************************************
	gen father_notworking = .
	replace father_notworking =1 if (v740==996 | v740==998) &  fatheroccej==. & (v745==1 | v745==2 | v745==.)
	replace father_notworking = 0 if fatheroccej!=. & (v745==1 | v745==2)
	tab father_notworking, m 
	
	gen mother_notworking=.
	replace mother_notworking=1 if motheroccej==. & (v745==3 | v745==4) & (v740==996 | v740==997 | v740==998)
	replace mother_notworking=1 if motheroccej==. & v745==. & v740==997 
	replace mother_notworking=0 if motheroccej!=. & (v745==3 | v745==4)
	tab mother_notworking if v740==997, m
	tab mother_notworking, m 
	
gen fatherfarm=0 
replace fatherfarm=. if fatheroccej==. 
replace fatherfarm=1 if fatheroccej==71 | fatheroccej==81 
tab fatherfarm, m 

*renaming father occcej so that the crosswalk can be brought in again and then merged on census1970 
rename fatheroccej father_occ1950ej 

*get rid of old census1970 var and create new one so that adult child's occ can be matched to crosswalk
drop census1970 /*currently = dad_occ */

/*------------------------------------------------------------------------------------------

Crosswalking 1976 AVTMH occupations to 1950 ANES occupations for sons/daughters

------------------------------------------------------------------------------------------*/

***********************
*		        *
*    Crosswalking     *
* 		        *
***********************

tab v311, m /*R occ*/
gen census1970=v311
label var census1970 "==child_occ (renamed to facilitate a merge)" 

merge m:1 census1970 using "../Crosswalks/Crosswalk_1970Census_toANES.dta"
assert census1970 if _merge==1
drop if _merge==2
drop _merge


*************************************************************************
* Fix occupations for self-employed businessmen, managers, or officials * 
*************************************************************************
//Will not recode if R is both self-empoyed and works for someone else in another job.
replace fatheroccej=21 if fatheroccej==28 & selfemployed==1

*renaming to match other data files 
rename fatheroccej occRej 
label var occRej "child_occ"

rename father_occ1950ej fatheroccej
label var fatheroccej "dad_occ"

drop census1970  

/*------------------------------------------------------------------------------------------
		Family Income							
------------------------------------------------------------------------------------------*/
*logged family income (value)--family income is not continuous in this survey

rename v766 totfam_inc

/*
The midpoint of each bin is assigned, with the exception of:
 (1) the last bin, whose bottom value is multiplied by 1.25 (as last bin is always "open-ended"--i.e. "25,000 or more")
 (2) the bottom bin, whose top value is multiplied by 0.75
*/
gen fam_inc=.  
replace fam_inc = .75*4000 if inrange(totfam_inc,1,4) //less than 4000
replace fam_inc = 5000 if inrange(totfam_inc,5,6) //4-6k
replace fam_inc = 7000 if inrange(totfam_inc,7,8) //6-8k
replace fam_inc = 9000 if inrange(totfam_inc,9,10) //8-10k
replace fam_inc = 11250 if inrange(totfam_inc,11,12) //10-12.5k
replace fam_inc = 13750 if totfam_inc==13 //12.5-15k
replace fam_inc = 16250 if totfam_inc==14 //15-17.5k
replace fam_inc = 18750 if totfam_inc==15 //17.5-20k
replace fam_inc = 22500 if totfam_inc==16 //20-25k
replace fam_inc = 30000 if totfam_inc==17 //25-35k
replace fam_inc = 1.25*35000 if totfam_inc==18 //35k and above
tab fam_inc, m

label var fam_inc "Family income, binned (based on midpoints of each bin)" 

//note: the suffix "_son" is used to match the variable names in other datasets. All respondents (i.e., male and female) are given a value for these variables. 
gen bottomcoded_son = fam_inc==.75*4000 if fam_inc<. 
tab bottomcoded_son, m 

gen topcoded_son = fam_inc==1.25*35000 if fam_inc<. 
tab topcoded_son, m 

label var bottomcoded_son "Respondent family income, bottom coded" 
label var topcoded_son "Respondent family income, top coded" 

/* Convert family income variable to 1950 dollars
   using the CPI: https://data.bls.gov/timeseries/CUUR0000SA0 */ 
gen CPI1950 = 24.1 
gen CPI1976 = 56.9

gen fam_inc_real =. 
replace fam_inc_real = fam_inc * (CPI1950/CPI1976) 
label var fam_inc_real "Binned Family income, in 1950 dollars" 
	
gen lnfaminc=ln(fam_inc_real) 
label var lnfaminc "Logged family income, binned and real" 

/*------------------------------------------------------------------------------------------
	Birth cohorts							
------------------------------------------------------------------------------------------*/

gen year = 1976
label var year "Survey year" 

gen dob = v728
tab dob, m
label var dob "Year of birth" 

gen decade=.
replace decade=1920 if inrange(dob,1920,1929)
replace decade=1930 if inrange(dob,1930,1939)
replace decade=1940 if inrange(dob,1940,1949)
tab decade,m
label var decade "Decade of birth"

* Generate dummies for each decade
tab decade, gen(decade_)


/*------------------------------------------------------------------------------------------

									Interactions			

------------------------------------------------------------------------------------------*/ 

	global institution_list "black hs_ed coll_ed"

* Demean the variables that we will use
	foreach var in $institution_list {
	sum `var'
	gen `var'_dm = `var'- `r(mean)'
	local temp: var label `var' 
	label var `var'_dm "`temp', demeaned" 
	}

/*------------------------------------------------------------------------------------------

										Save			

------------------------------------------------------------------------------------------*/ 

gen faminc_missing = fam_inc_real==.
label var faminc_missing "Family income missing"

*create unique id for each obs--in this dataset, the unique identifier is v4
ren v4 id_avtmh76
label var id_avtmh76 "SEQUENCE NUMBER (unique identifier)"

duplicates report id_avtmh76 //no duplicates reported

* Restrict sample and save
compress
sort id_avtmh76
ren *_AVTMH76 *_avtmh76
order id_avtmh76 weight_avtmh76
save "./output/AVTMH1976_cleaned.dta", replace
